#STOCK ALLOCATION PROJECT v2 - Regression & LSTM

IMPORT DATASETS AND LIBRARIES

In [ ]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [ ]:
import pandas as pd
import plotly.express as px
from copy import copy
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
import plotly.figure_factory as ff
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from tensorflow import keras
In [ ]:
# Read stock prices data
stock_price_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/stocks.csv')
stock_price_df
Out[ ]:
Date AAPL BA T MGM AMZN IBM TSLA GOOG sp500
0 2012-01-12 60.198570 75.510002 30.120001 12.130000 175.929993 180.550003 28.250000 313.644379 1295.500000
1 2012-01-13 59.972858 74.599998 30.070000 12.350000 178.419998 179.160004 22.790001 311.328064 1289.089966
2 2012-01-17 60.671429 75.239998 30.250000 12.250000 181.660004 180.000000 26.600000 313.116364 1293.670044
3 2012-01-18 61.301430 75.059998 30.330000 12.730000 189.440002 181.070007 26.809999 315.273285 1308.040039
4 2012-01-19 61.107143 75.559998 30.420000 12.800000 194.449997 180.520004 26.760000 318.590851 1314.500000
... ... ... ... ... ... ... ... ... ... ...
2154 2020-08-05 440.250000 174.279999 29.850000 16.719999 3205.030029 125.449997 1485.020020 1473.609985 3327.770020
2155 2020-08-06 455.609985 172.199997 29.840000 18.459999 3225.000000 126.120003 1489.579956 1500.099976 3349.159912
2156 2020-08-07 444.450012 170.020004 30.020000 19.030001 3167.459961 124.959999 1452.709961 1494.489990 3351.280029
2157 2020-08-10 450.910004 179.410004 30.200001 21.650000 3148.159912 127.110001 1418.569946 1496.099976 3360.469971
2158 2020-08-11 437.500000 180.130005 30.200001 21.500000 3080.669922 126.750000 1374.390015 1480.319946 3333.689941

2159 rows × 10 columns

In [ ]:
# Read the stocks volume data
stock_vol_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/stock_volume.csv')
stock_vol_df
Out[ ]:
Date AAPL BA T MGM AMZN IBM TSLA GOOG sp500
0 2012-01-12 53146800 3934500 26511100 17891100 5385800 6881000 729300 3764400 4019890000
1 2012-01-13 56505400 4641100 22096800 16621800 4753500 5279200 5500400 4631800 3692370000
2 2012-01-17 60724300 3700100 23500200 15480800 5644500 6003400 4651600 3832800 4010490000
3 2012-01-18 69197800 4189500 22015000 18387600 7473500 4600600 1260200 5544000 4096160000
4 2012-01-19 65434600 5397300 25524000 14022900 7096000 8567200 1246300 12657800 4465890000
... ... ... ... ... ... ... ... ... ... ...
2154 2020-08-05 30498000 46551000 22991700 18914200 3930000 3675400 4978000 1979500 4732220000
2155 2020-08-06 50607200 32921600 21908700 35867700 3940600 3417100 5992300 1995400 4267490000
2156 2020-08-07 49453300 19301600 30398500 34530300 3929600 3651000 8883500 1576600 4104860000
2157 2020-08-10 53100900 35857700 35514400 71219700 3167300 3968300 7522300 1289300 4318570000
2158 2020-08-11 46871100 60966900 30978300 34357900 3706600 4998500 8356000 1452000 5087650000

2159 rows × 10 columns

In [ ]:
# Sort the data based on Date
stock_price_df = stock_price_df.sort_values(by = ['Date'])
stock_price_df
Out[ ]:
Date AAPL BA T MGM AMZN IBM TSLA GOOG sp500
0 2012-01-12 60.198570 75.510002 30.120001 12.130000 175.929993 180.550003 28.250000 313.644379 1295.500000
1 2012-01-13 59.972858 74.599998 30.070000 12.350000 178.419998 179.160004 22.790001 311.328064 1289.089966
2 2012-01-17 60.671429 75.239998 30.250000 12.250000 181.660004 180.000000 26.600000 313.116364 1293.670044
3 2012-01-18 61.301430 75.059998 30.330000 12.730000 189.440002 181.070007 26.809999 315.273285 1308.040039
4 2012-01-19 61.107143 75.559998 30.420000 12.800000 194.449997 180.520004 26.760000 318.590851 1314.500000
... ... ... ... ... ... ... ... ... ... ...
2154 2020-08-05 440.250000 174.279999 29.850000 16.719999 3205.030029 125.449997 1485.020020 1473.609985 3327.770020
2155 2020-08-06 455.609985 172.199997 29.840000 18.459999 3225.000000 126.120003 1489.579956 1500.099976 3349.159912
2156 2020-08-07 444.450012 170.020004 30.020000 19.030001 3167.459961 124.959999 1452.709961 1494.489990 3351.280029
2157 2020-08-10 450.910004 179.410004 30.200001 21.650000 3148.159912 127.110001 1418.569946 1496.099976 3360.469971
2158 2020-08-11 437.500000 180.130005 30.200001 21.500000 3080.669922 126.750000 1374.390015 1480.319946 3333.689941

2159 rows × 10 columns

In [ ]:
# Sort the volume data based on Date
stock_vol_df = stock_vol_df.sort_values(by = ['Date'])
stock_vol_df
Out[ ]:
Date AAPL BA T MGM AMZN IBM TSLA GOOG sp500
0 2012-01-12 53146800 3934500 26511100 17891100 5385800 6881000 729300 3764400 4019890000
1 2012-01-13 56505400 4641100 22096800 16621800 4753500 5279200 5500400 4631800 3692370000
2 2012-01-17 60724300 3700100 23500200 15480800 5644500 6003400 4651600 3832800 4010490000
3 2012-01-18 69197800 4189500 22015000 18387600 7473500 4600600 1260200 5544000 4096160000
4 2012-01-19 65434600 5397300 25524000 14022900 7096000 8567200 1246300 12657800 4465890000
... ... ... ... ... ... ... ... ... ... ...
2154 2020-08-05 30498000 46551000 22991700 18914200 3930000 3675400 4978000 1979500 4732220000
2155 2020-08-06 50607200 32921600 21908700 35867700 3940600 3417100 5992300 1995400 4267490000
2156 2020-08-07 49453300 19301600 30398500 34530300 3929600 3651000 8883500 1576600 4104860000
2157 2020-08-10 53100900 35857700 35514400 71219700 3167300 3968300 7522300 1289300 4318570000
2158 2020-08-11 46871100 60966900 30978300 34357900 3706600 4998500 8356000 1452000 5087650000

2159 rows × 10 columns

In [ ]:
# Check if Null values exist in stock prices data
stock_price_df.isnull().sum()
Out[ ]:
Date     0
AAPL     0
BA       0
T        0
MGM      0
AMZN     0
IBM      0
TSLA     0
GOOG     0
sp500    0
dtype: int64
In [ ]:
# Check if Null values exist in stocks volume data
stock_vol_df.isnull().sum()
Out[ ]:
Date     0
AAPL     0
BA       0
T        0
MGM      0
AMZN     0
IBM      0
TSLA     0
GOOG     0
sp500    0
dtype: int64
In [ ]:
# Get stock prices dataframe info
stock_price_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2159 entries, 0 to 2158
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2159 non-null   object 
 1   AAPL    2159 non-null   float64
 2   BA      2159 non-null   float64
 3   T       2159 non-null   float64
 4   MGM     2159 non-null   float64
 5   AMZN    2159 non-null   float64
 6   IBM     2159 non-null   float64
 7   TSLA    2159 non-null   float64
 8   GOOG    2159 non-null   float64
 9   sp500   2159 non-null   float64
dtypes: float64(9), object(1)
memory usage: 185.5+ KB
In [ ]:
# Get stock volume dataframe info
stock_vol_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2159 entries, 0 to 2158
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    2159 non-null   object
 1   AAPL    2159 non-null   int64 
 2   BA      2159 non-null   int64 
 3   T       2159 non-null   int64 
 4   MGM     2159 non-null   int64 
 5   AMZN    2159 non-null   int64 
 6   IBM     2159 non-null   int64 
 7   TSLA    2159 non-null   int64 
 8   GOOG    2159 non-null   int64 
 9   sp500   2159 non-null   int64 
dtypes: int64(9), object(1)
memory usage: 185.5+ KB
In [ ]:
# The average trading volume for Apple stock:
stock_vol_df['AAPL'].mean()
Out[ ]:
58203317.41547012
In [ ]:
stock_vol_df['GOOG'].mean()
Out[ ]:
2498238.2584529873
In [ ]:
# The maximum trading volume for sp500:
stock_vol_df['sp500'].max()
Out[ ]:
9044690000
In [ ]:
# The maximum price of Tesla Stock:
stock_price_df['TSLA'].max()
Out[ ]:
1643.0
In [ ]:
# The average stock price of the S&P500 over this time period:
stock_price_df['sp500'].mean()
Out[ ]:
2218.7495540592877
In [ ]:
 

EXPLORATORY DATA ANALYSIS AND VISUALIZATION

Indented block

In [ ]:
# Function to normalize stock prices based on their initial price

def normalize(df):
  x = df.copy()
  for i in x.columns[1:]:
    x[i] = x[i]/x[i][0]

  return x
In [ ]:
# Function to plot interactive plots using Plotly Express

def interactive_plot(df, title):
  fig = px.line(title = title)
  for i in df.columns[1:]:
    fig.add_scatter(x = df['Date'], y = df[i], name = i)
  fig.show()
In [ ]:
# Plot interactive chart for Normalized stocks data
interactive_plot(normalize(stock_price_df), 'Normalized Stock Price Data')
In [ ]:
# Plot interactive chart for Non-normalized stocks data
interactive_plot(stock_price_df, 'Non-normalized Stock Prices')
In [ ]:
wo_sp500_price = stock_price_df.drop('sp500', axis=1)
interactive_plot(wo_sp500_price, 'Stock Price Data Without SP500')
In [ ]:
# Plot interactive chart for stock volume data
interactive_plot(stock_vol_df, 'Stock Volume Data')
In [ ]:
# Plot interactive chart for stocks data without SP500 
wo_sp500_vol = stock_vol_df.drop('sp500', axis=1)
interactive_plot(wo_sp500_vol, 'Stock Volume Data Without SP500')
In [ ]:
# Plot interactive chart for normalized stock volume data
interactive_plot(normalize(stock_vol_df), 'Normalized Stock Volume Data')

PREPARE THE DATA BEFORE TRAINING THE AI/ML MODEL

In [ ]:
# Function to concatenate the date, stock price, and volume in one dataframe
def individual_stock(price_df, vol_df, name):
  
  return pd.DataFrame({'Date':price_df['Date'], 'Close':price_df[name], 'Volume':vol_df[name]  })
In [ ]:
# Function to return the input/output (target) data for AI/ML Model
# Note that our goal is to predict the future stock price 
# Target stock price today will be tomorrow's price 

def trading_window(data):
  n = 1 #can change this to 10 for example
  data['target'] = data[['Close']].shift(-n)
  return data
  
In [ ]:
# Let's test the functions and get individual stock prices and volumes for AAPL
price_volume_df = individual_stock(stock_price_df, stock_vol_df, 'AAPL')
price_volume_df
Out[ ]:
Date Close Volume
0 2012-01-12 60.198570 53146800
1 2012-01-13 59.972858 56505400
2 2012-01-17 60.671429 60724300
3 2012-01-18 61.301430 69197800
4 2012-01-19 61.107143 65434600
... ... ... ...
2154 2020-08-05 440.250000 30498000
2155 2020-08-06 455.609985 50607200
2156 2020-08-07 444.450012 49453300
2157 2020-08-10 450.910004 53100900
2158 2020-08-11 437.500000 46871100

2159 rows × 3 columns

In [ ]:
price_volume_target_df = trading_window(price_volume_df)
price_volume_target_df
Out[ ]:
Date Close Volume target
0 2012-01-12 60.198570 53146800 59.972858
1 2012-01-13 59.972858 56505400 60.671429
2 2012-01-17 60.671429 60724300 61.301430
3 2012-01-18 61.301430 69197800 61.107143
4 2012-01-19 61.107143 65434600 60.042858
... ... ... ... ...
2154 2020-08-05 440.250000 30498000 455.609985
2155 2020-08-06 455.609985 50607200 444.450012
2156 2020-08-07 444.450012 49453300 450.910004
2157 2020-08-10 450.910004 53100900 437.500000
2158 2020-08-11 437.500000 46871100 NaN

2159 rows × 4 columns

In [ ]:
# Remove the last row as it will be a null value
price_volume_target_df = price_volume_target_df[:-1]
price_volume_target_df
Out[ ]:
Date Close Volume target
0 2012-01-12 60.198570 53146800 59.972858
1 2012-01-13 59.972858 56505400 60.671429
2 2012-01-17 60.671429 60724300 61.301430
3 2012-01-18 61.301430 69197800 61.107143
4 2012-01-19 61.107143 65434600 60.042858
... ... ... ... ...
2153 2020-08-04 438.660004 43267900 440.250000
2154 2020-08-05 440.250000 30498000 455.609985
2155 2020-08-06 455.609985 50607200 444.450012
2156 2020-08-07 444.450012 49453300 450.910004
2157 2020-08-10 450.910004 53100900 437.500000

2158 rows × 4 columns

In [ ]:
# Scale the data
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range = (0,1))

# Since we want to scale every thing except the date column we just drop it!
price_volume_target_scaled_df = sc.fit_transform(price_volume_target_df.drop(columns = ['Date']))
price_volume_target_scaled_df
Out[ ]:
array([[0.01102638, 0.11442624, 0.01046185],
       [0.01046185, 0.12362365, 0.01220906],
       [0.01220906, 0.13517696, 0.01378478],
       ...,
       [1.        , 0.10747163, 0.97208751],
       [0.97208751, 0.10431171, 0.98824476],
       [0.98824476, 0.11430054, 0.95470465]])
In [ ]:
price_volume_target_scaled_df.shape
Out[ ]:
(2158, 3)
In [ ]:
 
In [ ]:
# Create Feature and Target
X = price_volume_target_scaled_df[:, :2]     #features is everything except the last 'Target' column
y = price_volume_target_scaled_df[:, 2:]
In [ ]:
 
In [ ]:
 
In [ ]:
# Spliting the data this way, since order is important in time-series
# Note that we did not use train test split with it's default settings since it shuffles the data
X
Out[ ]:
array([[0.01102638, 0.11442624],
       [0.01046185, 0.12362365],
       [0.01220906, 0.13517696],
       ...,
       [1.        , 0.10747163],
       [0.97208751, 0.10431171],
       [0.98824476, 0.11430054]])
In [ ]:
y
Out[ ]:
array([[0.01046185],
       [0.01220906],
       [0.01378478],
       ...,
       [0.97208751],
       [0.98824476],
       [0.95470465]])
In [ ]:
X.shape, y.shape
Out[ ]:
((2158, 2), (2158, 1))
In [ ]:
# Split the data for 65% Training,  35% Testing

split = int(0.65 * len(X))
X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]
In [ ]:
X_train.shape, y_train.shape
Out[ ]:
((1402, 2), (1402, 1))
In [ ]:
X_test.shape, y_test.shape
Out[ ]:
((756, 2), (756, 1))
In [ ]:
 
In [ ]:
 
In [ ]:
# Define a data plotting function

def show_plot(data, title):
  plt.figure(figsize = (13, 5))
  plt.plot(data, linewidth = 3)
  plt.title(title)
  plt.grid()

show_plot(X_train, 'Training Dataset')
show_plot(X_test, 'Testing Dataset')

TEST PIPELINE WITH USING APPLE DATASET

In [ ]:
def scale_split_dateset(df, split = 0.65):
  sc = MinMaxScaler(feature_range = (0,1))

  # Since we want to scale every thing except the date column we just drop it!
#  if df['Date']:
  scaled_df = sc.fit_transform(df.drop(columns = ['Date']))

  X = scaled_df[:, :2]     #features is everything except the last 'Target' column
  y = scaled_df[:, 2:]

  split = int(split * len(X))
  X_train, y_train = X[:split], y[:split]
  X_test, y_test = X[split:], y[split:]

  show_plot(X_train, 'Training Dataset')
  show_plot(X_test, 'Testing Dataset')

  return X_train, y_train, X_test, y_test
In [ ]:
 
In [ ]:
# Make dataset for AAPL stock
# Create a Price-Vol-Target dataframe dropping the last row since it has an 'NaN'
aapl_target_df = trading_window(individual_stock(stock_price_df, stock_vol_df, 'AAPL'))[:-1]

# Scale and Split dataframe in to Train/Test datasets for training
X_train, y_train, X_test, y_test = scale_split_dateset(aapl_target_df)

print('Training shape: {},{}  and   Testing shape: {},{}'.format(X_train.shape, y_train.shape, X_test.shape, y_test.shape ))
Training shape: (1402, 2),(1402, 1)  and   Testing shape: (756, 2),(756, 1)
In [ ]:
 

BUILD AND TRAIN A RIDGE LINEAR REGRESSION MODEL

In [ ]:
from sklearn.linear_model import Ridge
# Note that Ridge regression performs linear least squares with L2 regularization.
# Create and train the Ridge Linear Regression  Model
lr = Ridge()

lr.fit(X_train, y_train)
Out[ ]:
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)
In [ ]:
# Test the model and calculate its accuracy 
lr_accuracy = lr.score(X_test, y_test)
print('My Ridge Regression Score is: {}'.format(lr_accuracy))
My Ridge Regression Score is: 0.7950028030821767
In [ ]:
# Make Prediction
predicted_prices = lr.predict(X)
predicted_prices
Out[ ]:
array([[0.03466412],
       [0.03374627],
       [0.03451936],
       ...,
       [0.81048342],
       [0.78876033],
       [0.80091324]])
In [ ]:
# Append the predicted values into a list
predicted_list = []
for i in predicted_prices:
  predicted_list.append(i[0])

len(predicted_list)
Out[ ]:
2158
In [ ]:
# Append the close values to the list

true_list = []

for i in price_volume_target_scaled_df:
  true_list.append(i[0])

len(true_list)
Out[ ]:
2158
In [ ]:
price_volume_target_df[['Date']]
Out[ ]:
Date
0 2012-01-12
1 2012-01-13
2 2012-01-17
3 2012-01-18
4 2012-01-19
... ...
2153 2020-08-04
2154 2020-08-05
2155 2020-08-06
2156 2020-08-07
2157 2020-08-10

2158 rows × 1 columns

In [ ]:
# Create a dataframe based on the dates in the individual stock data
df_predicted = price_volume_target_df[['Date']]

# Add predicted_list, true_list columns
df_predicted['True $'] = true_list
df_predicted['Predicted $'] = predicted_list

df_predicted
Out[ ]:
Date True $ Predicted $
0 2012-01-12 0.011026 0.034664
1 2012-01-13 0.010462 0.033746
2 2012-01-17 0.012209 0.034519
3 2012-01-18 0.013785 0.034556
4 2012-01-19 0.013299 0.034707
... ... ... ...
2153 2020-08-04 0.957606 0.778280
2154 2020-08-05 0.961583 0.783205
2155 2020-08-06 1.000000 0.810483
2156 2020-08-07 0.972088 0.788760
2157 2020-08-10 0.988245 0.800913

2158 rows × 3 columns

In [ ]:
# Plot the results
interactive_plot(df_predicted, 'Ground Truth vs. Predicted Prices')

TRAIN AN LSTM TIME SERIES MODEL

In [ ]:
# Let's test the functions and get individual stock prices and volumes for AAPL
stk_price_volume_df = individual_stock(stock_price_df, stock_vol_df, 'T')
stk_price_volume_df
Out[ ]:
Date Close Volume
0 2012-01-12 30.120001 26511100
1 2012-01-13 30.070000 22096800
2 2012-01-17 30.250000 23500200
3 2012-01-18 30.330000 22015000
4 2012-01-19 30.420000 25524000
... ... ... ...
2154 2020-08-05 29.850000 22991700
2155 2020-08-06 29.840000 21908700
2156 2020-08-07 30.020000 30398500
2157 2020-08-10 30.200001 35514400
2158 2020-08-11 30.200001 30978300

2159 rows × 3 columns

In [ ]:
# Get the close and volume data as training data (Input)
training_data = stk_price_volume_df.iloc[:, 1:3].values       #drop Date and just get Close and Volume for training data
training_data
Out[ ]:
array([[3.0120001e+01, 2.6511100e+07],
       [3.0070000e+01, 2.2096800e+07],
       [3.0250000e+01, 2.3500200e+07],
       ...,
       [3.0020000e+01, 3.0398500e+07],
       [3.0200001e+01, 3.5514400e+07],
       [3.0200001e+01, 3.0978300e+07]])
In [ ]:
# Normalize the data
sc = MinMaxScaler(feature_range= (0,1))
training_set_scaled = sc.fit_transform(training_data)
training_set_scaled
Out[ ]:
array([[0.20059885, 0.10439203],
       [0.19760478, 0.0809392 ],
       [0.20838322, 0.08839535],
       ...,
       [0.19461077, 0.12504549],
       [0.20538927, 0.15222588],
       [0.20538927, 0.12812592]])
In [ ]:
# Create the training and testing data, training data contains present day and previous day values
X = []
y = []

for i in range(1, len(stk_price_volume_df)):
  X.append(training_set_scaled[i-1:i, 0])
  y.append(training_set_scaled[i, 0])
  
In [ ]:
X
In [ ]:
# Convert the data into array format
X = np.asarray(X)
y = np.asarray(y)
In [ ]:
X
Out[ ]:
array([[0.20059885],
       [0.19760478],
       [0.20838322],
       ...,
       [0.18383232],
       [0.19461077],
       [0.20538927]])
In [ ]:
# Split the data 70/30 for train-test datasets

split = int(0.7 * len(X))
X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]

print('Training shape: {},{}  and   Testing shape: {},{}'.format(X_train.shape, y_train.shape, X_test.shape, y_test.shape ))
Training shape: (1510, 1),(1510,)  and   Testing shape: (648, 1),(648,)
In [ ]:
 
In [ ]:
# Reshape the 1D arrays to 3D arrays to feed in the LSTM model
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

print('Now my Training shape: {},{}  and   Testing shape: {},{}'.format(X_train.shape, y_train.shape, X_test.shape, y_test.shape ))
Now my Training shape: (1510, 1, 1),(1510,)  and   Testing shape: (648, 1, 1),(648,)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
# Create a basice LSTM model using Keras

inputs = keras.layers.Input(shape = (X_train.shape[1], X_train.shape[2] ))

x = keras.layers.LSTM(150, return_sequences=True)(inputs)
x = keras.layers.LSTM(150, return_sequences=True)(x)
x = keras.layers.LSTM(150, return_sequences=True)(x)

outputs = keras.layers.Dense(1, activation='linear')(x)    #Use 'linear' since we have a continuous stock price


model = keras.Model(inputs = inputs, outputs = outputs)
model.compile(optimizer = 'adam', loss = 'mse' )          #Use 'adam' is the most common
model.summary()
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 1, 1)]            0         
_________________________________________________________________
lstm (LSTM)                  (None, 1, 150)            91200     
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 150)            180600    
_________________________________________________________________
lstm_2 (LSTM)                (None, 1, 150)            180600    
_________________________________________________________________
dense (Dense)                (None, 1, 1)              151       
=================================================================
Total params: 452,551
Trainable params: 452,551
Non-trainable params: 0
_________________________________________________________________
In [ ]:
# Train the model
history = model.fit(X_train, y_train, epochs = 30, batch_size= 32, validation_split=0.2)
Epoch 1/30
38/38 [==============================] - 6s 42ms/step - loss: 0.2116 - val_loss: 0.0243
Epoch 2/30
38/38 [==============================] - 0s 10ms/step - loss: 0.0122 - val_loss: 0.0195
Epoch 3/30
38/38 [==============================] - 0s 10ms/step - loss: 0.0066 - val_loss: 0.0060
Epoch 4/30
38/38 [==============================] - 0s 10ms/step - loss: 0.0023 - val_loss: 7.3511e-04
Epoch 5/30
38/38 [==============================] - 0s 10ms/step - loss: 6.5934e-04 - val_loss: 7.9627e-04
Epoch 6/30
38/38 [==============================] - 0s 10ms/step - loss: 5.7473e-04 - val_loss: 7.7430e-04
Epoch 7/30
38/38 [==============================] - 0s 10ms/step - loss: 5.6764e-04 - val_loss: 8.0796e-04
Epoch 8/30
38/38 [==============================] - 0s 10ms/step - loss: 5.5883e-04 - val_loss: 7.6378e-04
Epoch 9/30
38/38 [==============================] - 0s 10ms/step - loss: 4.5710e-04 - val_loss: 7.5683e-04
Epoch 10/30
38/38 [==============================] - 1s 16ms/step - loss: 5.1990e-04 - val_loss: 7.9062e-04
Epoch 11/30
38/38 [==============================] - 0s 10ms/step - loss: 4.6243e-04 - val_loss: 7.6305e-04
Epoch 12/30
38/38 [==============================] - 0s 10ms/step - loss: 4.8038e-04 - val_loss: 7.0737e-04
Epoch 13/30
38/38 [==============================] - 0s 10ms/step - loss: 4.8505e-04 - val_loss: 7.7031e-04
Epoch 14/30
38/38 [==============================] - 0s 10ms/step - loss: 4.5216e-04 - val_loss: 9.3147e-04
Epoch 15/30
38/38 [==============================] - 0s 10ms/step - loss: 5.0887e-04 - val_loss: 7.4350e-04
Epoch 16/30
38/38 [==============================] - 0s 10ms/step - loss: 4.3376e-04 - val_loss: 6.9802e-04
Epoch 17/30
38/38 [==============================] - 0s 9ms/step - loss: 4.7306e-04 - val_loss: 7.2399e-04
Epoch 18/30
38/38 [==============================] - 0s 10ms/step - loss: 4.7126e-04 - val_loss: 7.5140e-04
Epoch 19/30
38/38 [==============================] - 0s 10ms/step - loss: 4.2418e-04 - val_loss: 9.4845e-04
Epoch 20/30
38/38 [==============================] - 0s 10ms/step - loss: 4.7405e-04 - val_loss: 6.9012e-04
Epoch 21/30
38/38 [==============================] - 0s 10ms/step - loss: 4.2051e-04 - val_loss: 6.7404e-04
Epoch 22/30
38/38 [==============================] - 0s 10ms/step - loss: 4.5406e-04 - val_loss: 7.3954e-04
Epoch 23/30
38/38 [==============================] - 0s 10ms/step - loss: 3.9749e-04 - val_loss: 6.8164e-04
Epoch 24/30
38/38 [==============================] - 0s 9ms/step - loss: 4.2263e-04 - val_loss: 7.0990e-04
Epoch 25/30
38/38 [==============================] - 0s 10ms/step - loss: 3.9814e-04 - val_loss: 8.4057e-04
Epoch 26/30
38/38 [==============================] - 0s 10ms/step - loss: 4.7159e-04 - val_loss: 6.3930e-04
Epoch 27/30
38/38 [==============================] - 0s 10ms/step - loss: 4.0980e-04 - val_loss: 6.7106e-04
Epoch 28/30
38/38 [==============================] - 0s 10ms/step - loss: 4.1635e-04 - val_loss: 6.7118e-04
Epoch 29/30
38/38 [==============================] - 0s 10ms/step - loss: 4.0324e-04 - val_loss: 6.3859e-04
Epoch 30/30
38/38 [==============================] - 0s 10ms/step - loss: 4.5773e-04 - val_loss: 6.7544e-04
In [ ]:
# Make prediction
predicted = model.predict(X)
predicted
Out[ ]:
array([[[0.21291399]],

       [[0.21006034]],

       [[0.22033952]],

       ...,

       [[0.19695124]],

       [[0.20720808]],

       [[0.2174825 ]]], dtype=float32)
In [ ]:
# Append the predicted values to a list
test_predicted = []

for i in predicted:
  test_predicted.append(i[0][0])
In [ ]:
#test_predicted
In [ ]:
# Now, build a dataframe with date and SCALED data for the Close and Predicted price
df_predicted = price_volume_df[1:][['Date']]
df_predicted
Out[ ]:
Date
1 2012-01-13
2 2012-01-17
3 2012-01-18
4 2012-01-19
5 2012-01-20
... ...
2154 2020-08-05
2155 2020-08-06
2156 2020-08-07
2157 2020-08-10
2158 2020-08-11

2158 rows × 1 columns

In [ ]:
training_set_scaled[0]
Out[ ]:
array([0.20059885, 0.10439203])
In [ ]:
# Now, add the SCALED Closing price to this dataframe

close = []
for i in training_set_scaled:
  close.append(i[0])

df_predicted['Close'] = close[1:]
df_predicted
Out[ ]:
Date Close
1 2012-01-13 0.197605
2 2012-01-17 0.208383
3 2012-01-18 0.213174
4 2012-01-19 0.218563
5 2012-01-20 0.223952
... ... ...
2154 2020-08-05 0.184431
2155 2020-08-06 0.183832
2156 2020-08-07 0.194611
2157 2020-08-10 0.205389
2158 2020-08-11 0.205389

2158 rows × 2 columns

In [ ]:
 
In [ ]:
# Now, add my predictions to this dataframe

df_predicted['Predictions'] = test_predicted
df_predicted
Out[ ]:
Date Close Predictions
1 2012-01-13 0.197605 0.212914
2 2012-01-17 0.208383 0.210060
3 2012-01-18 0.213174 0.220340
4 2012-01-19 0.218563 0.224914
5 2012-01-20 0.223952 0.230064
... ... ... ...
2154 2020-08-05 0.184431 0.206638
2155 2020-08-06 0.183832 0.197521
2156 2020-08-07 0.194611 0.196951
2157 2020-08-10 0.205389 0.207208
2158 2020-08-11 0.205389 0.217483

2158 rows × 3 columns

In [ ]:
# Now, Show the interactive plot of the true and predicted prices

interactive_plot(df_predicted, 'Original Price vs. LSTM Model Predictions')
In [ ]:
 

BUILD FUNCTIONS TO BRING IT TOGETHER

In [ ]:
def make_training_data(stk_price_volume_df, split_ratio = 0.80):

  # Get the close and volume data as training data (Input)
  training_data = stk_price_volume_df.iloc[:, 1:3].values       #drop Date and just get Close and Volume for training data

#  print(training_data)
#  print('')

  # Create scaler instance and Normalize the data
  sc = MinMaxScaler(feature_range= (0,1))
  training_set_scaled = sc.fit_transform(training_data)


  # Create the Features and Labels for training/test data contains present day and previous day values
  X = []
  y = []

  for i in range(1, len(stk_price_volume_df)):      #want to start at the 2nd row so start at '1'
    X.append(training_set_scaled[i-1:i, 0])     #make features one day in the past
    y.append(training_set_scaled[i, 0])         #use the regular closing as targets



  # Convert the data into array format
  X = np.asarray(X)
  y = np.asarray(y)

#  print(X)
#  print('')


  # Create a Split index for the data for train-test datasets based on the passed desired ratio

  split_idx = int(split_ratio * len(X))
  X_train, y_train = X[:split_idx], y[:split_idx]
  X_test, y_test = X[split_idx:], y[split_idx:]


  # Reshape the 1D arrays to 3D arrays to feed in the LSTM model
  X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
  X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

  

  return X, X_train, y_train, X_test, y_test, training_set_scaled
In [ ]:
# Create LSTM model using Dropout

def build_model(X_train, num_node = 100, drop_out = 0.2):

  # Create the model using Keras

  inputs = keras.layers.Input(shape = (X_train.shape[1], X_train.shape[2] ))

  x = keras.layers.LSTM(num_node, return_sequences=True)(inputs)
  x = keras.layers.Dropout(drop_out)(x)
  x = keras.layers.LSTM(num_node, return_sequences=True)(x)
  x = keras.layers.Dropout(drop_out)(x)
  x = keras.layers.LSTM(num_node, return_sequences=True)(x)

  outputs = keras.layers.Dense(1, activation='linear')(x)    #Use 'linear' since we have a continuous stock price

  model = keras.Model(inputs = inputs, outputs = outputs)
  model.compile(optimizer = 'adam', loss = 'mse' )          #Use 'adam' is the most common
  model.summary()

  return model
In [ ]:
def make_predictions(model, X, stock):

  # Make prediction
  predicted = model.predict(X)

  # Append the predicted values to a list
  test_predicted = []

  for i in predicted:
    test_predicted.append(i[0][0])

  return test_predicted


 
In [ ]:
def show_true_and_predicted(stock):

 # Build a dataframe with date and SCALED data for the Close and Predicted price
  df_predicted = price_volume_df[1:][['Date']]

  # Add the SCALED Closing price column to this dataframe
  close = []
  for i in training_set_scaled:
    close.append(i[0])

  df_predicted['Close'] = close[1:]

  # Add my predictions to this dataframe

  df_predicted['Predictions'] = test_predicted

  print('')
  print('The final Original & Predicted dataframe for stock {}:'.format(stock))
  print(df_predicted)

  # Show the interactive plot of the true and predicted prices

  interactive_plot(df_predicted, 'Original {} Price vs. LSTM Model Predictions'.format(stock))  

  
In [ ]:
stock_price_df
Out[ ]:
Date AAPL BA T MGM AMZN IBM TSLA GOOG sp500
0 2012-01-12 60.198570 75.510002 30.120001 12.130000 175.929993 180.550003 28.250000 313.644379 1295.500000
1 2012-01-13 59.972858 74.599998 30.070000 12.350000 178.419998 179.160004 22.790001 311.328064 1289.089966
2 2012-01-17 60.671429 75.239998 30.250000 12.250000 181.660004 180.000000 26.600000 313.116364 1293.670044
3 2012-01-18 61.301430 75.059998 30.330000 12.730000 189.440002 181.070007 26.809999 315.273285 1308.040039
4 2012-01-19 61.107143 75.559998 30.420000 12.800000 194.449997 180.520004 26.760000 318.590851 1314.500000
... ... ... ... ... ... ... ... ... ... ...
2154 2020-08-05 440.250000 174.279999 29.850000 16.719999 3205.030029 125.449997 1485.020020 1473.609985 3327.770020
2155 2020-08-06 455.609985 172.199997 29.840000 18.459999 3225.000000 126.120003 1489.579956 1500.099976 3349.159912
2156 2020-08-07 444.450012 170.020004 30.020000 19.030001 3167.459961 124.959999 1452.709961 1494.489990 3351.280029
2157 2020-08-10 450.910004 179.410004 30.200001 21.650000 3148.159912 127.110001 1418.569946 1496.099976 3360.469971
2158 2020-08-11 437.500000 180.130005 30.200001 21.500000 3080.669922 126.750000 1374.390015 1480.319946 3333.689941

2159 rows × 10 columns

In [ ]:
stock_vol_df
Out[ ]:
Date AAPL BA T MGM AMZN IBM TSLA GOOG sp500
0 2012-01-12 53146800 3934500 26511100 17891100 5385800 6881000 729300 3764400 4019890000
1 2012-01-13 56505400 4641100 22096800 16621800 4753500 5279200 5500400 4631800 3692370000
2 2012-01-17 60724300 3700100 23500200 15480800 5644500 6003400 4651600 3832800 4010490000
3 2012-01-18 69197800 4189500 22015000 18387600 7473500 4600600 1260200 5544000 4096160000
4 2012-01-19 65434600 5397300 25524000 14022900 7096000 8567200 1246300 12657800 4465890000
... ... ... ... ... ... ... ... ... ... ...
2154 2020-08-05 30498000 46551000 22991700 18914200 3930000 3675400 4978000 1979500 4732220000
2155 2020-08-06 50607200 32921600 21908700 35867700 3940600 3417100 5992300 1995400 4267490000
2156 2020-08-07 49453300 19301600 30398500 34530300 3929600 3651000 8883500 1576600 4104860000
2157 2020-08-10 53100900 35857700 35514400 71219700 3167300 3968300 7522300 1289300 4318570000
2158 2020-08-11 46871100 60966900 30978300 34357900 3706600 4998500 8356000 1452000 5087650000

2159 rows × 10 columns

In [ ]:
for i in stock_price_df.columns[6:]:

  print('')
  print('==============Making Predictions for stock {} ================'.format(str(i)))  
  # First get the date, close and volume for a stock
  stk_price_volume_df = individual_stock(stock_price_df, stock_vol_df, i)

#  print(stk_price_volume_df)

   
  X, X_train, y_train, X_test, y_test, training_set_scaled = make_training_data(stk_price_volume_df, split_ratio = 0.70)
  print('My Training shape: {},{}  and   Testing shape: {},{}'.format(X_train.shape, y_train.shape, X_test.shape, y_test.shape ))

  
  model = build_model(X_train, num_node=300, drop_out=0.3)

  # Train the model
  history = model.fit(X_train, y_train, epochs = 20, batch_size= 32, validation_split=0.2)

  test_predicted = make_predictions(model, X, i)

  show_true_and_predicted(i)

  
==============Making Predictions for stock IBM ================
My Training shape: (1510, 1, 1),(1510,)  and   Testing shape: (648, 1, 1),(648,)
Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_6 (InputLayer)         [(None, 1, 1)]            0         
_________________________________________________________________
lstm_15 (LSTM)               (None, 1, 300)            362400    
_________________________________________________________________
dropout (Dropout)            (None, 1, 300)            0         
_________________________________________________________________
lstm_16 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 300)            0         
_________________________________________________________________
lstm_17 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dense_5 (Dense)              (None, 1, 1)              301       
=================================================================
Total params: 1,805,101
Trainable params: 1,805,101
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
38/38 [==============================] - 6s 53ms/step - loss: 0.2678 - val_loss: 0.0040
Epoch 2/20
38/38 [==============================] - 1s 21ms/step - loss: 0.0111 - val_loss: 0.0056
Epoch 3/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0060 - val_loss: 0.0027
Epoch 4/20
38/38 [==============================] - 1s 21ms/step - loss: 0.0029 - val_loss: 4.2088e-04
Epoch 5/20
38/38 [==============================] - 1s 29ms/step - loss: 0.0019 - val_loss: 3.1823e-04
Epoch 6/20
38/38 [==============================] - 1s 21ms/step - loss: 0.0018 - val_loss: 2.2860e-04
Epoch 7/20
38/38 [==============================] - 1s 21ms/step - loss: 0.0015 - val_loss: 1.7572e-04
Epoch 8/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0015 - val_loss: 1.8745e-04
Epoch 9/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0015 - val_loss: 2.4496e-04
Epoch 10/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0018 - val_loss: 1.9672e-04
Epoch 11/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0015 - val_loss: 2.2262e-04
Epoch 12/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0015 - val_loss: 1.9338e-04
Epoch 13/20
38/38 [==============================] - 1s 23ms/step - loss: 0.0014 - val_loss: 2.1356e-04
Epoch 14/20
38/38 [==============================] - 1s 23ms/step - loss: 0.0014 - val_loss: 1.8330e-04
Epoch 15/20
38/38 [==============================] - 1s 24ms/step - loss: 0.0011 - val_loss: 3.7001e-04
Epoch 16/20
38/38 [==============================] - 1s 23ms/step - loss: 0.0012 - val_loss: 1.6868e-04
Epoch 17/20
38/38 [==============================] - 1s 23ms/step - loss: 0.0011 - val_loss: 2.0803e-04
Epoch 18/20
38/38 [==============================] - 1s 23ms/step - loss: 9.7413e-04 - val_loss: 1.8264e-04
Epoch 19/20
38/38 [==============================] - 1s 24ms/step - loss: 0.0011 - val_loss: 2.3184e-04
Epoch 20/20
38/38 [==============================] - 1s 23ms/step - loss: 0.0011 - val_loss: 1.7109e-04

The final Original & Predicted dataframe for stock IBM:
            Date     Close  Predictions
1     2012-01-13  0.697265     0.709146
2     2012-01-17  0.704206     0.697764
3     2012-01-18  0.713046     0.704643
4     2012-01-19  0.708502     0.713403
5     2012-01-20  0.774601     0.708900
...          ...       ...          ...
2154  2020-08-05  0.253491     0.261555
2155  2020-08-06  0.259027     0.258381
2156  2020-08-07  0.249442     0.263835
2157  2020-08-10  0.267206     0.254393
2158  2020-08-11  0.264232     0.271896

[2158 rows x 3 columns]
==============Making Predictions for stock TSLA ================
My Training shape: (1510, 1, 1),(1510,)  and   Testing shape: (648, 1, 1),(648,)
Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_7 (InputLayer)         [(None, 1, 1)]            0         
_________________________________________________________________
lstm_18 (LSTM)               (None, 1, 300)            362400    
_________________________________________________________________
dropout_2 (Dropout)          (None, 1, 300)            0         
_________________________________________________________________
lstm_19 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dropout_3 (Dropout)          (None, 1, 300)            0         
_________________________________________________________________
lstm_20 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dense_6 (Dense)              (None, 1, 1)              301       
=================================================================
Total params: 1,805,101
Trainable params: 1,805,101
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
38/38 [==============================] - 7s 52ms/step - loss: 0.0050 - val_loss: 0.0072
Epoch 2/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0017 - val_loss: 1.6576e-04
Epoch 3/20
38/38 [==============================] - 1s 22ms/step - loss: 6.0512e-05 - val_loss: 1.7501e-05
Epoch 4/20
38/38 [==============================] - 1s 22ms/step - loss: 3.4509e-05 - val_loss: 1.9104e-05
Epoch 5/20
38/38 [==============================] - 1s 22ms/step - loss: 2.9642e-05 - val_loss: 4.7917e-05
Epoch 6/20
38/38 [==============================] - 1s 22ms/step - loss: 3.2978e-05 - val_loss: 1.7109e-05
Epoch 7/20
38/38 [==============================] - 1s 23ms/step - loss: 2.7902e-05 - val_loss: 1.7597e-05
Epoch 8/20
38/38 [==============================] - 1s 22ms/step - loss: 2.9755e-05 - val_loss: 5.8720e-05
Epoch 9/20
38/38 [==============================] - 1s 22ms/step - loss: 2.6335e-05 - val_loss: 1.7069e-05
Epoch 10/20
38/38 [==============================] - 1s 22ms/step - loss: 2.4096e-05 - val_loss: 1.7064e-05
Epoch 11/20
38/38 [==============================] - 1s 23ms/step - loss: 2.3978e-05 - val_loss: 1.7678e-05
Epoch 12/20
38/38 [==============================] - 1s 22ms/step - loss: 2.7619e-05 - val_loss: 1.8716e-05
Epoch 13/20
38/38 [==============================] - 1s 23ms/step - loss: 2.5628e-05 - val_loss: 4.5967e-05
Epoch 14/20
38/38 [==============================] - 1s 22ms/step - loss: 3.0441e-05 - val_loss: 1.7441e-05
Epoch 15/20
38/38 [==============================] - 1s 23ms/step - loss: 2.2627e-05 - val_loss: 1.9421e-05
Epoch 16/20
38/38 [==============================] - 1s 24ms/step - loss: 2.3590e-05 - val_loss: 2.6373e-05
Epoch 17/20
38/38 [==============================] - 1s 26ms/step - loss: 2.3372e-05 - val_loss: 1.8085e-05
Epoch 18/20
38/38 [==============================] - 1s 25ms/step - loss: 2.4684e-05 - val_loss: 2.1860e-05
Epoch 19/20
38/38 [==============================] - 1s 24ms/step - loss: 2.2940e-05 - val_loss: 2.3424e-05
Epoch 20/20
38/38 [==============================] - 1s 23ms/step - loss: 2.5441e-05 - val_loss: 2.0027e-05

The final Original & Predicted dataframe for stock TSLA:
            Date     Close  Predictions
1     2012-01-13  0.000000     0.005441
2     2012-01-17  0.002352     0.002068
3     2012-01-18  0.002481     0.004421
4     2012-01-19  0.002450     0.004551
5     2012-01-20  0.002352     0.004520
...          ...       ...          ...
2154  2020-08-05  0.902494     0.880168
2155  2020-08-06  0.905309     0.879048
2156  2020-08-07  0.882552     0.881628
2157  2020-08-10  0.861481     0.860731
2158  2020-08-11  0.834213     0.841304

[2158 rows x 3 columns]
==============Making Predictions for stock GOOG ================
My Training shape: (1510, 1, 1),(1510,)  and   Testing shape: (648, 1, 1),(648,)
Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_8 (InputLayer)         [(None, 1, 1)]            0         
_________________________________________________________________
lstm_21 (LSTM)               (None, 1, 300)            362400    
_________________________________________________________________
dropout_4 (Dropout)          (None, 1, 300)            0         
_________________________________________________________________
lstm_22 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dropout_5 (Dropout)          (None, 1, 300)            0         
_________________________________________________________________
lstm_23 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dense_7 (Dense)              (None, 1, 1)              301       
=================================================================
Total params: 1,805,101
Trainable params: 1,805,101
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
38/38 [==============================] - 7s 52ms/step - loss: 0.0276 - val_loss: 0.0256
Epoch 2/20
38/38 [==============================] - 1s 29ms/step - loss: 0.0020 - val_loss: 1.0606e-04
Epoch 3/20
38/38 [==============================] - 1s 23ms/step - loss: 1.7940e-04 - val_loss: 5.4454e-05
Epoch 4/20
38/38 [==============================] - 1s 22ms/step - loss: 1.5103e-04 - val_loss: 6.3845e-05
Epoch 5/20
38/38 [==============================] - 1s 22ms/step - loss: 1.5433e-04 - val_loss: 5.9028e-05
Epoch 6/20
38/38 [==============================] - 1s 22ms/step - loss: 1.1103e-04 - val_loss: 5.1548e-05
Epoch 7/20
38/38 [==============================] - 1s 22ms/step - loss: 1.1206e-04 - val_loss: 5.2823e-05
Epoch 8/20
38/38 [==============================] - 1s 22ms/step - loss: 1.3190e-04 - val_loss: 6.2150e-05
Epoch 9/20
38/38 [==============================] - 1s 22ms/step - loss: 1.1828e-04 - val_loss: 5.0911e-05
Epoch 10/20
38/38 [==============================] - 1s 23ms/step - loss: 1.0804e-04 - val_loss: 1.1897e-04
Epoch 11/20
38/38 [==============================] - 1s 21ms/step - loss: 1.0576e-04 - val_loss: 1.7950e-04
Epoch 12/20
38/38 [==============================] - 1s 22ms/step - loss: 1.2805e-04 - val_loss: 6.2064e-05
Epoch 13/20
38/38 [==============================] - 1s 22ms/step - loss: 1.0137e-04 - val_loss: 5.2790e-05
Epoch 14/20
38/38 [==============================] - 1s 22ms/step - loss: 9.9099e-05 - val_loss: 9.4420e-05
Epoch 15/20
38/38 [==============================] - 1s 22ms/step - loss: 1.0333e-04 - val_loss: 5.5776e-05
Epoch 16/20
38/38 [==============================] - 1s 23ms/step - loss: 1.1316e-04 - val_loss: 5.2919e-05
Epoch 17/20
38/38 [==============================] - 1s 22ms/step - loss: 1.0048e-04 - val_loss: 6.8886e-05
Epoch 18/20
38/38 [==============================] - 1s 22ms/step - loss: 9.6065e-05 - val_loss: 8.6196e-05
Epoch 19/20
38/38 [==============================] - 1s 23ms/step - loss: 9.3305e-05 - val_loss: 5.2718e-05
Epoch 20/20
38/38 [==============================] - 1s 22ms/step - loss: 9.7125e-05 - val_loss: 5.5418e-05

The final Original & Predicted dataframe for stock GOOG:
            Date     Close  Predictions
1     2012-01-13  0.025463     0.024983
2     2012-01-17  0.026849     0.023164
3     2012-01-18  0.028521     0.024568
4     2012-01-19  0.031093     0.026262
5     2012-01-20  0.010403     0.028868
...          ...       ...          ...
2154  2020-08-05  0.926450     0.903929
2155  2020-08-06  0.946985     0.910211
2156  2020-08-07  0.942636     0.929433
2157  2020-08-10  0.943884     0.925367
2158  2020-08-11  0.931652     0.926534

[2158 rows x 3 columns]
==============Making Predictions for stock sp500 ================
My Training shape: (1510, 1, 1),(1510,)  and   Testing shape: (648, 1, 1),(648,)
Model: "model_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_9 (InputLayer)         [(None, 1, 1)]            0         
_________________________________________________________________
lstm_24 (LSTM)               (None, 1, 300)            362400    
_________________________________________________________________
dropout_6 (Dropout)          (None, 1, 300)            0         
_________________________________________________________________
lstm_25 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dropout_7 (Dropout)          (None, 1, 300)            0         
_________________________________________________________________
lstm_26 (LSTM)               (None, 1, 300)            721200    
_________________________________________________________________
dense_8 (Dense)              (None, 1, 1)              301       
=================================================================
Total params: 1,805,101
Trainable params: 1,805,101
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
38/38 [==============================] - 7s 53ms/step - loss: 0.0461 - val_loss: 0.0186
Epoch 2/20
38/38 [==============================] - 1s 22ms/step - loss: 0.0048 - val_loss: 0.0020
Epoch 3/20
38/38 [==============================] - 1s 22ms/step - loss: 3.1867e-04 - val_loss: 1.6665e-04
Epoch 4/20
38/38 [==============================] - 1s 21ms/step - loss: 2.0958e-04 - val_loss: 2.7651e-05
Epoch 5/20
38/38 [==============================] - 1s 22ms/step - loss: 2.3185e-04 - val_loss: 3.3960e-05
Epoch 6/20
38/38 [==============================] - 1s 21ms/step - loss: 2.1717e-04 - val_loss: 2.7252e-05
Epoch 7/20
38/38 [==============================] - 1s 22ms/step - loss: 1.9606e-04 - val_loss: 3.3294e-05
Epoch 8/20
38/38 [==============================] - 1s 21ms/step - loss: 2.0364e-04 - val_loss: 4.4259e-05
Epoch 9/20
38/38 [==============================] - 1s 21ms/step - loss: 1.6826e-04 - val_loss: 4.1135e-05
Epoch 10/20
38/38 [==============================] - 1s 21ms/step - loss: 1.8765e-04 - val_loss: 6.3658e-05
Epoch 11/20
38/38 [==============================] - 1s 22ms/step - loss: 1.7101e-04 - val_loss: 6.0340e-05
Epoch 12/20
38/38 [==============================] - 1s 28ms/step - loss: 1.8482e-04 - val_loss: 1.2504e-04
Epoch 13/20
38/38 [==============================] - 1s 21ms/step - loss: 1.6124e-04 - val_loss: 3.3477e-05
Epoch 14/20
38/38 [==============================] - 1s 21ms/step - loss: 1.8826e-04 - val_loss: 3.4237e-05
Epoch 15/20
38/38 [==============================] - 1s 22ms/step - loss: 1.6508e-04 - val_loss: 3.4277e-05
Epoch 16/20
38/38 [==============================] - 1s 21ms/step - loss: 1.5353e-04 - val_loss: 2.6226e-05
Epoch 17/20
38/38 [==============================] - 1s 21ms/step - loss: 1.3927e-04 - val_loss: 5.2711e-05
Epoch 18/20
38/38 [==============================] - 1s 22ms/step - loss: 1.4245e-04 - val_loss: 6.7928e-05
Epoch 19/20
38/38 [==============================] - 1s 21ms/step - loss: 1.3910e-04 - val_loss: 4.1917e-05
Epoch 20/20
38/38 [==============================] - 1s 22ms/step - loss: 1.7458e-04 - val_loss: 2.1056e-04

The final Original & Predicted dataframe for stock sp500:
            Date     Close  Predictions
1     2012-01-13  0.005242     0.005224
2     2012-01-17  0.007414     0.002185
3     2012-01-18  0.014231     0.004356
4     2012-01-19  0.017295     0.011168
5     2012-01-20  0.017713     0.014230
...          ...       ...          ...
2154  2020-08-05  0.972307     0.922884
2155  2020-08-06  0.982453     0.932009
2156  2020-08-07  0.983459     0.941174
2157  2020-08-10  0.987819     0.942081
2158  2020-08-11  0.975115     0.946013

[2158 rows x 3 columns]
In [ ]:
 
In [ ]:
 
In [ ]: